Convert python pie charts to stacked plot with relative percentages across time.


In [150]:
%matplotlib inline

import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from pandas import DataFrame, Series
pd.set_option("display.max_columns", None)

Load Data into DataFrame


In [62]:
ManClus=csv.reader(open('MMETSP_140513_Cluster.tab'),delimiter='\t') #Manual clusters

AllClus=csv.reader(open('MMETSP_HigherOrder.tab'),delimiter='\t')
ClusCount=open('SummedSpecies.tab')
mpl.rcParams['pdf.fonttype'] = 42
Columns=["S1", "S2", "S3", "S4", 'S5', 'A', 'B', 'C', 'D', 'E']
data=pd.read_table(ClusCount, names=Columns, index_col=0)

Create two hashes for phylum and genus level to sort the counts into two new Pandas data frames


In [149]:
GenusHash={}
PhylumHash={}

for S in MC_hash:
    key=MC_hash[S][0]
    G=MMETSP_Hash[key][3]
    P=MMETSP_Hash[key][0]
    ShortName=MC_hash[S]
    if G in GenusHash:
        GenusHash[G].append(S)
    else: 
        GenusHash[G]=[S]
    if P in PhylumHash:
        PhylumHash[P][0].append(S)
        PhylumHash[P][1].append(G)

    else:
        PhylumHash[P]=[[],[]]
        PhylumHash[P][0]=[S]
        PhylumHash[P][1]=[G]
        
for key in PhylumHash:
    S_List=PhylumHash[key][0]

In [169]:
Species_Counts=data.T
Pdf=pd.DataFrame(index=Columns)
for key in PhylumHash:
    S_List=PhylumHash[key][0]
    for x in S_List:
        if x in set(Species_Counts.columns.values):
            if key in Pdf:
               Pdf[key]=Pdf[key]+Species_Counts[x]
                
            else:
                Pdf[key]=Species_Counts[x]
Pdf['Unaligned']=Species_Counts['Unaligned']

Species_Counts=data.T
Gdf=pd.DataFrame(index=Columns)
for key in GenusHash:
    S_List=GenusHash[key]
    for x in S_List:
        if x in set(Species_Counts.columns.values):
            if key in Gdf:
               Gdf[key]=Gdf[key]+Species_Counts[x]
                
            else:
                Gdf[key]=Species_Counts[x]
Gdf['Unaligned']=Species_Counts['Unaligned']

In [146]:
Gdf.loc['Mean']=Gdf.mean(axis=0)
Gdf.T.sort('Mean', ascending=True)


Out[146]:
S1 S2 S3 S4 S5 A B C D E Mean
Oxyrrhis 5495 5082 5815 2056 2224 2803 2825 3006 1924 2923 3415.3
Perkinsus 21326 12707 22504 14217 14144 11759 10868 12502 13290 12794 14611.1
Sarcinochrysis 44351 15862 36329 26673 18740 20649 15480 17881 13185 16893 22604.3
Timspurckia 32712 13640 44248 22243 34377 25266 21485 22243 36013 24331 27655.8
Prasinococcus 44424 29702 45506 21549 22638 24619 24267 26574 19301 25417 28399.7
Synchroma 61292 34702 51460 19675 22528 30458 27075 31681 16442 28039 32335.2
Picocystis 67230 21557 51379 24928 32539 27643 28742 30404 26402 29888 34071.2
Minchinia 57921 83642 23731 13545 44687 13874 13150 44289 9585 77749 38217.3
Lankesteria 42980 39101 56523 29045 48142 34702 26919 32299 39133 34150 38299.4
Amoebophrya 83500 31501 65189 27281 33703 34025 34543 35444 30562 34054 40980.2
Condylostoma 46384 17637 67559 38150 49102 39596 29918 30677 59428 34971 41342.2
Trichosphaerium 51196 18847 61892 37560 54356 40517 32947 34100 55158 36383 42295.6
Chlamydomonas 71439 41010 58166 39169 32219 37646 29840 44378 28959 41866 42469.2
Uronema 60014 23324 70558 41785 44347 41839 27011 31672 56308 36713 43357.1
Neobodo 67197 46520 73647 26314 33450 43203 38531 44210 25293 41847 44021.2
Chrysocystis 73537 43393 76378 25384 35400 37970 35097 44113 25804 43246 44032.2
Pinguiococcus 64077 54404 71369 28259 34094 39176 36022 43450 31331 41004 44318.6
Crustomastix 69467 41677 74150 63823 29648 38140 34793 39133 19889 34950 44567.0
Fibrocapsa 69123 22816 65410 53145 58866 38089 30670 33332 41741 33113 44630.5
Chrysoculter 70658 49899 76508 24423 32546 48452 38836 40246 25683 39220 44647.1
Rhodella 99195 36973 69448 30697 38003 43232 38528 42182 32694 40081 47103.3
Vitrella 67775 46531 77068 37083 37902 49471 37930 44600 36031 44242 47863.3
Alveolata 82929 48728 79218 37293 38150 42961 40356 48138 31888 47959 49762.0
Porphyridium 76124 45884 65037 37156 45085 68788 33575 36078 54790 38752 50126.9
Filamoeba 53691 26423 79229 41751 66774 45478 39160 41796 64057 44925 50328.4
Chromulina 76197 21914 90430 41766 57386 52199 34226 34392 70151 37786 51644.7
Pterosperma 72794 43413 83609 44423 46496 47968 45025 49838 47690 50623 53187.9
Erythrolobus 86772 98369 75254 31063 41748 43964 38142 41551 34070 41100 53203.3
Cyanoptyche 95848 52299 83179 35398 48457 52153 43671 43759 38425 45902 53909.1
Dolichomastix 87455 53314 88459 35848 43191 51475 46164 52475 32967 50148 54149.6
... ... ... ... ... ... ... ... ... ... ... ...
Florenciella 472524 454798 1198168 170339 239962 328677 284907 332443 156717 311368 394990.3
Rhizosolenia 1141270 133702 556457 582048 440004 221934 179224 203500 288338 205450 395192.7
Amphiprora 562785 245603 688787 433806 592998 309239 283493 319290 366718 314497 411721.6
Thalassionema 900396 236044 641783 402831 560453 328519 251578 295933 460465 291054 436905.6
Favella 802630 577897 371488 703186 336076 270859 567000 473170 220296 631053 495365.5
Emiliania 948582 596379 763768 225727 297648 432402 425275 585429 193941 550461 501961.2
Nitzschia 588258 347052 836513 427638 741976 409430 369243 428911 503947 438303 509127.1
Cyclotella 3094234 273977 421000 250054 407094 181863 113618 129122 226273 132783 523001.8
Karenia 878897 597573 980623 395188 474343 465843 478683 537359 434677 542237 578542.3
Tetraselmis 993069 700475 885700 344276 494022 520809 509276 559260 344142 594487 594551.6
Fragilariopsis 634817 501199 935347 497390 719856 567393 415665 505438 684306 527857 598926.8
Ditylum 765723 370135 937166 783361 756957 562027 414261 483642 727527 458711 625951.0
Asterionellopsis 704117 305027 981471 768072 883128 573541 409498 456427 750363 465223 629686.7
Scrippsiella 1143598 983865 1117791 309321 468196 528950 518061 596630 347087 543064 655656.3
Ostreococcus 1468550 695260 1144626 519160 642132 576795 535169 579169 475004 563311 719917.6
Aureococcus 442610 570037 2003765 208095 163142 1411039 806218 726850 544200 823257 769921.3
Ochromonas 856914 1255959 926005 429015 1214861 615061 453624 743113 549905 780845 782530.2
Eucampia 308732 100938 1519114 1365998 1640851 923686 600311 586003 1285334 628844 895981.1
Chrysochromulina 1184397 885416 1652545 393677 435511 1130910 790394 1249764 407324 1204523 933446.1
Leptocylindrus 880379 435877 2085248 808451 657835 1218134 699387 799630 1746769 788917 1012062.7
Symbiodinium 1783005 883643 1515818 737959 813534 855735 899512 994441 772727 1270097 1052647.1
Alexandrium 1571351 1328213 1856871 656863 769525 1010406 1107838 1118418 581253 1034983 1103572.1
Pseudo-nitzschia 1624947 1125490 1752902 1253828 1405407 1010405 768683 970658 1108454 973427 1199420.1
Thalassiosira 2541854 970231 1705645 1209202 1231799 917525 776847 850059 1056696 930651 1219050.9
Bigelowiella 1269766 2316754 1455238 648685 1751739 905403 716532 1263613 696424 1242217 1226637.1
Chaetoceros 2263859 1263133 2355138 1746462 1882953 1176883 861175 1016282 1513794 1174315 1525399.4
Undescribed 2935517 1902924 2457566 1010059 1429618 1415128 1206454 1440897 910060 1445559 1615378.2
Prorocentrum 1228019 1834701 9327860 1027963 363883 3528151 2753080 3876026 1678940 3220500 2883912.3
Skeletonema 13783563 22638516 4725290 2202391 1769058 1996271 1003058 1139408 2530080 1277664 5306529.9
Unaligned 24653949 22353769 40152626 21691482 27363806 27633398 18611535 22285626 27784295 22801707 25533219.3

187 rows × 11 columns


In [247]:
#Create a final histogram for plotting
Fdf=Pdf.copy()
Fdf['Skeletonema']=Gdf['Skeletonema']
Fdf['Thalassiosira']=Gdf['Thalassiosira']
Fdf['Prorocentrum']=Gdf['Prorocentrum']
Fdf['Bacillariophyta']=Fdf['Bacillariophyta']-Fdf['Skeletonema']-Fdf['Thalassiosira']
Fdf['Dinophyta']=Fdf['Dinophyta']-Gdf['Prorocentrum']
Fdf.loc['Mean']=Fdf.mean()
Col_order=['Unaligned','Bacillariophyta','Skeletonema', 'Thalassiosira', 'Dinophyta', 'Prorocentrum', 'Ochrophyta', 'Chlorophyta', 'Ciliophora']

Cols=set(Fdf.columns.values)
Cols=list(Cols-set(Col_order))
#Reorder and cluster into "other" catagory
Fdf['Other']=0
Fdf
for x in Cols:
    Fdf['Other']=Fdf['Other']+Fdf[x]
    Fdf=Fdf.drop(x, axis=1)
Col_order.append('Other')
Col_order=Col_order[::-1]  
Fdf=Fdf[Col_order]
Fdf=Fdf.drop('Mean')
Fdf['sum']=Fdf.T.sum()

#Calculate percentage
Fdf_percentage=Fdf.copy()
for x in Col_order:
    Fdf_percentage[x]=Fdf_percentage[x]/Fdf_percentage['sum']
Fdf_percentage=Fdf_percentage.T.drop('sum').T

In [259]:
#Plot Stack plot
c=cm.hot(10)
fig=plt.figure()
ax=fig.add_subplot(111)
Fdf_plot=Fdf_percentage.T[['S1','S2','S3','S4','S5']]
ax.stackplot(np.arange(5),Fdf_plot, color=c)
ax.margins(0, 0)
fig.savefig('StackPlot.pdf')



In [246]:
Col_order[::-1]


Out[246]:
['Other',
 'Ciliophora',
 'Chlorophyta',
 'Ochrophyta',
 'Prorocentrum',
 'Dinophyta',
 'Thalassiosira',
 'Skeletonema',
 'Bacillariophyta',
 'Unaligned']

In [ ]:
load